#############################################################################################
# ------------------------------------------------------------------------------------------
# R-replication script for "A Warning on Separation in Multinomial Logistic Models"
# Cook, Scott J., John Niehaus, and Samantha Zuhkle
# 3/10/2018
#
# ----------------------------------------------------------------------------------------
# Script runs various specifications of bias-corrected multinomial logits (Firth-MNL)
# (ie, correcting age, not correcting age, relative risk, not relative risk)
# for the tables in our paper, and creates data-frames with the relevant values. 
#
# For the naive MNL and Binary Logit models, see the Stata .do file. 
#
# ----------------------------------------------------------------------------------------
# Done under R-version 3.3.2 -- "Sincere Pumpkin Patch" 
# R-Studio Version 1.0.136
# Package "brglm2" version 0.1.4
# Package "foreign" version 0.8-69
# Package "nnet" version 7.3-12
# Package "TeachingDemos" version 2.10
#
# Windows 10, 64-bit
#-----------------------------------------------------------------------------------------
###########################################################################################


rm(list =ls())

#setwd("")


#install.packages("foreign")
#install.packages("nnet")
#install.packages("brglm2")
#install.packages("TeachingDemos")
library(foreign)
library(nnet)
library(brglm2)
library(TeachingDemos)

# create log 
txtStart("Warning_RAP_Comment_RLog.txt")

# Load data from Oppenheim, et al. (obtained from online replication files) 
TB_Rdata <- as.data.frame(read.dta("replication data.dta")) 

# Restricts the sample to that used in Oppenheim, et al. 
TB_Rdata <- TB_Rdata[ which(TB_Rdata$p504dantes !=""),] 

# Prepare DV for multinomial models  
TB_Rdata$dv2 <- relevel(TB_Rdata$DV_cap_inddemob_switcher, ref="captured") 


#===================================================================================================
#============================Table 3 in Main Text  =================================================
#===================================================================================================

# Results for bias corrected multinomial logits (FIRTH-MNL), Models 3-5 as report in Table 3 

#========= MODEL 3================================================
biascorrected.firth.results.m3 <- brmultinom(dv2~ join_ec_need*p409dummy  + p104a + yrbirth + p203 + p101, data=TB_Rdata, type = "AS_mean", ref = 1)

#store and rearrange results 
m3.coef=as.data.frame(summary(biascorrected.firth.results.m3)["coefficients"]) 
m3.se=as.data.frame(summary(biascorrected.firth.results.m3)["standard.errors"]) 
m3.coef=as.data.frame(t(m3.coef)) 
m3.se=as.data.frame(t(m3.se)) 
m3.obs=as.data.frame(nrow(fitted(biascorrected.firth.results.m3)))
m3.aic=as.data.frame(summary(biascorrected.firth.results.m3)["AIC"])
m3.quantities=cbind(m3.coef,m3.se) 
colnames(m3.quantities)=c("m3.demob beta", "m3.switch beta", "m3.demob SE", "m3.switch SE") 
m3.quantities=m3.quantities[,c("m3.demob beta", "m3.demob SE", "m3.switch beta", "m3.switch SE")] 
m3.quantities$z.score.demob=abs(m3.quantities$'m3.demob beta'/m3.quantities$'m3.demob SE') 
m3.quantities$z.score.switch=abs(m3.quantities$'m3.switch beta'/m3.quantities$'m3.switch SE')
m3.output <- m3.quantities[c(2,8,3,4,5,6,7,1),]

#print results 
print(m3.output)

#========= MODEL 4================================================

biascorrected.firth.results.m4 <- brmultinom(dv2~ join_ideo*p402g  + p104a + yrbirth + p203 + p101, data=TB_Rdata, type = "AS_mean", ref = 1)

#store and rearrange results 
m4.coef=as.data.frame(summary(biascorrected.firth.results.m4)["coefficients"]) 
m4.se=as.data.frame(summary(biascorrected.firth.results.m4)["standard.errors"]) 
m4.coef=as.data.frame(t(m4.coef)) 
m4.se=as.data.frame(t(m4.se)) 
m4.obs=as.data.frame(nrow(fitted(biascorrected.firth.results.m4)))
m4.aic=as.data.frame(summary(biascorrected.firth.results.m4)["AIC"]) 
m4.quantities=cbind(m4.coef,m4.se) 
colnames(m4.quantities)=c("m4.demob beta", "m4.switch beta", "m4.demob SE", "m4.switch SE") # renaming columns
m4.quantities=m4.quantities[,c("m4.demob beta", "m4.demob SE", "m4.switch beta", "m4.switch SE")] #reordering columns
m4.quantities$z.score.demob=abs(m4.quantities$'m4.demob beta'/m4.quantities$'m4.demob SE') # absolute values
m4.quantities$z.score.switch=abs(m4.quantities$'m4.switch beta'/m4.quantities$'m4.switch SE') #absolute values
m4.output <- m4.quantities[c(2,8,3,4,5,6,7,1),]

#print results 
print(m4.output)


#========= MODEL 5================================================

biascorrected.firth.results.m5 <- brmultinom(dv2~ join_ec_need*p402g  + p104a + yrbirth + p203 + p101, data=TB_Rdata, type = "AS_mean", ref = 1)

#store and rearrange results 
m5.coef=as.data.frame(summary(biascorrected.firth.results.m5)["coefficients"]) 
m5.se=as.data.frame(summary(biascorrected.firth.results.m5)["standard.errors"]) 
m5.coef=as.data.frame(t(m5.coef)) 
m5.se=as.data.frame(t(m5.se)) 
m5.obs=as.data.frame(nrow(fitted(biascorrected.firth.results.m5)))
m5.aic=as.data.frame(summary(biascorrected.firth.results.m5)["AIC"]) 
m5.quantities=cbind(m5.coef,m5.se)
colnames(m5.quantities)=c("m5.demob beta", "m5.switch beta", "m5.demob SE", "m5.switch SE") # renaming columns
m5.quantities=m5.quantities[,c("m5.demob beta", "m5.demob SE", "m5.switch beta", "m5.switch SE")] #reordering columns
m5.quantities$z.score.demob=abs(m5.quantities$'m5.demob beta'/m5.quantities$'m5.demob SE') #absolute values
m5.quantities$z.score.switch=abs(m5.quantities$'m5.switch beta'/m5.quantities$'m5.switch SE')
m5.output <- m5.quantities[c(2,8,3,4,5,6,7,1),]

#print results 
print(m5.output)


#===================================================================================================
#============================ Appendix Results  ====================================================
#===================================================================================================

###############################Table 1#############################################################

# Results for bias corrected (FIRTH-MNL), models 1 & 2, as seen in table 1 of the appendix. 

#======================== Model 1 ===============================================
m1.bias.corrected<- brmultinom(dv2~ join_ideo + p104a + yrbirth + p203 + p101, data=TB_Rdata, type = "AS_mean", ref = 1)

# Store and rearrange results
m1.coef=as.data.frame(summary(m1.bias.corrected)["coefficients"]) 
m1.se=as.data.frame(summary(m1.bias.corrected)["standard.errors"]) 
m1.coef=as.data.frame(t(m1.coef)) 
m1.se=as.data.frame(t(m1.se)) 
m1.obs=nrow(fitted(m1.bias.corrected))
m1.aic=summary(m1.bias.corrected)["AIC"] 
m1.bias.a.nr=as.data.frame(cbind(m1.coef,m1.se,m1.aic, m1.obs)) 
colnames(m1.bias.a.nr)=c("m1.demob beta", "m1.switch beta", "m1.demob SE", "m1.switch SE", "m1.AIC", "m1.N") 
m1.bias.a.nr=m1.bias.a.nr[,c("m1.demob beta", "m1.demob SE", "m1.switch beta", "m1.switch SE", "m1.N", "m1.AIC")] 
m1.bias.a.nr$z.score.demob=abs(m1.bias.a.nr$'m1.demob beta'/m1.bias.a.nr$'m1.demob SE') 
m1.bias.a.nr$z.score.switch=abs(m1.bias.a.nr$'m1.switch beta'/m1.bias.a.nr$'m1.switch SE')
m1.output <- m1.bias.a.nr[c(2,3,4,5,6,1),]

#print results
print(m1.output)

#======================Model 2================================
m2.bias.corrected<- brmultinom(dv2~ join_ec_need + p104a + yrbirth + p203 + p101, data=TB_Rdata, type = "AS_mean", ref = 1)

# Store and rearrange results - THIS DOES NOT MATCH THE SUMMARY COMMAND, SOMETHING WEIRD IS GOING ON 
m2.coef=as.data.frame(summary(m2.bias.corrected)["coefficients"]) 
m2.se=as.data.frame(summary(m2.bias.corrected)["standard.errors"]) 
m2.coef=as.data.frame(t(m2.coef)) 
m2.se=as.data.frame(t(m2.se)) 
m2.obs=nrow(fitted(m2.bias.corrected))
m2.aic=summary(m2.bias.corrected)["AIC"] 
m2.bias.a.nr=as.data.frame(cbind(m2.coef,m2.se,m2.aic, m2.obs)) 
colnames(m2.bias.a.nr)=c("m2.demob beta", "m2.switch beta", "m2.demob SE", "m2.switch SE", "m2.AIC", "m2.N") # renaming columns
m2.bias.a.nr=m2.bias.a.nr[,c("m2.demob beta", "m2.demob SE", "m2.switch beta", "m2.switch SE", "m2.N", "m2.AIC")] #reordering columns
m2.bias.a.nr$z.score.demob=abs(m2.bias.a.nr$'m2.demob beta'/m2.bias.a.nr$'m2.demob SE') #absolute values
m2.bias.a.nr$z.score.switch=abs(m2.bias.a.nr$'m2.switch beta'/m2.bias.a.nr$'m2.switch SE')
m2.output <- m2.bias.a.nr[c(2,3,4,5,6,1),]

#print results
print(m2.output)

###############################Table 2####################################################################################

# Results for bias corrected (Firth-MNL), models 3-5 as seen in table 2 of the appendix. Reported as Risk Ratios.

#============================ Model 3 ===============================================


#obtaining relative risks, and rearranging (models run previously)
m3.rr.coeff=exp(m3.coef)
m3.rr.se = m3.rr.coeff*m3.se
m3.quantities.RRR=cbind(m3.rr.coeff,m3.rr.se) 
m3.quantities.RRR=cbind(m3.quantities.RRR,m3.obs,m3.aic) 
colnames(m3.quantities.RRR)=c("m3.demob beta", "m3.switch beta", "m3.demob SE", "m3.switch SE", "m3.N", "m3.AIC") 
m3.quantities.RRR=m3.quantities.RRR[,c("m3.demob beta", "m3.demob SE", "m3.switch beta", "m3.switch SE", "m3.N", "m3.AIC")]
m3.quantities.RRR$z.score.demob=abs(m3.quantities$'m3.demob beta'/m3.quantities$'m3.demob SE') 
m3.quantities.RRR$z.score.switch=abs(m3.quantities$'m3.switch beta'/m3.quantities$'m3.switch SE') 
m3.risk.output <- m3.quantities.RRR[c(2,8,3,4,5,6,7,1),]


#print results 
print(m3.risk.output)

#======================= Model 4 ================================================

# obtaining relative risks, and rearranging (models run previously)
m4.rr.coeff=exp(m4.coef)
m4.rr.se = m4.rr.coeff*m4.se
m4.quantities.RRR=cbind(m4.rr.coeff,m4.rr.se) 
m4.quantities.RRR=cbind(m4.quantities.RRR,m4.obs,m4.aic) 
colnames(m4.quantities.RRR)=c("m4.demob beta", "m4.switch beta", "m4.demob SE", "m4.switch SE", "m4.N", "m4.AIC") #renaming columns for RRR frame
m4.quantities.RRR=m4.quantities.RRR[,c("m4.demob beta", "m4.demob SE", "m4.switch beta", "m4.switch SE", "m4.N", "m4.AIC")]
m4.quantities.RRR$z.score.demob=abs(m4.quantities$'m4.demob beta'/m4.quantities$'m4.demob SE') # absolute values
m4.quantities.RRR$z.score.switch=abs(m4.quantities$'m4.switch beta'/m4.quantities$'m4.switch SE') #absolute values
m4.risk.output <- m4.quantities.RRR[c(2,8,3,4,5,6,7,1),]

#print results
print(m4.risk.output)

#======================= MODEL 5 ===============================================

# obtaining relative risks, and rearranging results (models run previously)
m5.rr.coeff=exp(m5.coef)
m5.rr.se = m5.rr.coeff*m5.se
m5.quantities.RRR=cbind(m5.rr.coeff,m5.rr.se)
m5.quantities.RRR=cbind(m5.quantities.RRR, m5.obs, m5.aic) 
colnames(m5.quantities.RRR)=c("m5.demob beta", "m5.switch beta", "m5.demob SE", "m5.switch SE", "m5.N", "m5.AIC") 
colnames(m5.quantities.RRR)=c("m5.demob beta", "m5.switch beta", "m5.demob SE", "m5.switch SE", "m5.N", "m5.AIC") 
m5.quantities.RRR=m5.quantities.RRR[,c("m5.demob beta", "m5.demob SE", "m5.switch beta", "m5.switch SE", "m5.N" ,"m5.AIC")]
m5.quantities.RRR$z.score.demob=abs(m5.quantities$'m5.demob beta'/m5.quantities$'m5.demob SE') 
m5.quantities.RRR$z.score.switch=abs(m5.quantities$'m5.switch beta'/m5.quantities$'m5.switch SE')
m5.risk.output <- m5.quantities.RRR[c(2,8,3,4,5,6,7,1),]

#print results
print(m5.risk.output)


############################### Table 3####################################################################################

# Results for bias corrected (Firth-MNL), models 3-5 as seen in table 3 of the appendix. 
# using a new age variable to replace year of birth.


# Generates new variable 'age' to use in place of 'yrbirth' 
TB_Rdata$yrstudy <- 2008
TB_Rdata$ageys <- TB_Rdata$yrstudy-TB_Rdata$yrbirth


#============================== Model 3 ===========================================
biascorrected.firth.results.m3a <- brmultinom(dv2~ join_ec_need*p409dummy  + p104a + ageys + p203 + p101, data=TB_Rdata, type = "AS_mean", ref = 1)

#store and rearrange results
m3a.coef=as.data.frame(summary(biascorrected.firth.results.m3a)["coefficients"]) 
m3a.se=as.data.frame(summary(biascorrected.firth.results.m3a)["standard.errors"]) 
m3a.coef=as.data.frame(t(m3a.coef))
m3a.se=as.data.frame(t(m3a.se)) 
m3a.obs=nrow(fitted(biascorrected.firth.results.m3a))
m3a.aic=summary(biascorrected.firth.results.m3a)["AIC"] 
m3a.bias.corrected.not.age.not.rrr.quantities=as.data.frame(cbind(m3a.coef,m3a.se,m3a.aic, m3a.obs)) 
colnames(m3a.bias.corrected.not.age.not.rrr.quantities)=c("m3a.demob beta", "m3a.switch beta", "m3a.demob SE", "m3a.switch SE", "m3a.AIC", "m3a.N") 
m3a.bias.corrected.not.age.not.rrr.quantities=m3a.bias.corrected.not.age.not.rrr.quantities[,c("m3a.demob beta", "m3a.demob SE", "m3a.switch beta", "m3a.switch SE", "m3a.N", "m3a.AIC")] 
m3a.bias.corrected.not.age.not.rrr.quantities$z.score.demob=abs(m3a.bias.corrected.not.age.not.rrr.quantities$'m3a.demob beta'/m3a.bias.corrected.not.age.not.rrr.quantities$'m3a.demob SE') 
m3a.bias.corrected.not.age.not.rrr.quantities$z.score.switch=abs(m3a.bias.corrected.not.age.not.rrr.quantities$'m3a.switch beta'/m3a.bias.corrected.not.age.not.rrr.quantities$'m3a.switch SE')
m3.not.age.not.risk.output <- m3a.bias.corrected.not.age.not.rrr.quantities[c(2,8,3,4,5,6,7,1),]

#print results
print(m3.not.age.not.risk.output)


#===================================== Model 4 =====================================
biascorrected.firth.results.m4a <- brmultinom(dv2~ join_ideo*p402g  + p104a + ageys + p203 + p101, data=TB_Rdata, type = "AS_mean", ref = 1)

#store and rearrange results
m4a.coef=as.data.frame(summary(biascorrected.firth.results.m4a)["coefficients"]) 
m4a.se=as.data.frame(summary(biascorrected.firth.results.m4a)["standard.errors"]) 
m4a.coef=as.data.frame(t(m4a.coef)) 
m4a.se=as.data.frame(t(m4a.se)) 
m4a.obs=nrow(fitted(biascorrected.firth.results.m4a))
m4a.aic=summary(biascorrected.firth.results.m4a)["AIC"] 
m4a.bias.corrected.not.age.not.rrr.quantities=as.data.frame(cbind(m4a.coef,m4a.se,m4a.aic, m4a.obs)) 
colnames(m4a.bias.corrected.not.age.not.rrr.quantities)=c("m4a.demob beta", "m4a.switch beta", "m4a.demob SE", "m4a.switch SE", "m4a.AIC", "m4a.N") 
m4a.bias.corrected.not.age.not.rrr.quantities=m4a.bias.corrected.not.age.not.rrr.quantities[,c("m4a.demob beta", "m4a.demob SE", "m4a.switch beta", "m4a.switch SE", "m4a.N", "m4a.AIC")] 
m4a.bias.corrected.not.age.not.rrr.quantities$z.score.demob=abs(m4a.bias.corrected.not.age.not.rrr.quantities$'m4a.demob beta'/m4a.bias.corrected.not.age.not.rrr.quantities$'m4a.demob SE') 
m4a.bias.corrected.not.age.not.rrr.quantities$z.score.switch=abs(m4a.bias.corrected.not.age.not.rrr.quantities$'m4a.switch beta'/m4a.bias.corrected.not.age.not.rrr.quantities$'m4a.switch SE')
m4.not.age.not.risk.output <- m4a.bias.corrected.not.age.not.rrr.quantities[c(2,8,3,4,5,6,7,1),]

#print results
print(m4.not.age.not.risk.output)

#===================================== Model 5 =====================================
biascorrected.firth.results.m5a <- brmultinom(dv2~ join_ec_need*p402g  + p104a + ageys + p203 + p101, data=TB_Rdata, type = "AS_mean", ref = 1)

#store and rearrange results
m5a.coef=as.data.frame(summary(biascorrected.firth.results.m5a)["coefficients"]) 
m5a.se=as.data.frame(summary(biascorrected.firth.results.m5a)["standard.errors"]) 
m5a.coef=as.data.frame(t(m5a.coef)) 
m5a.se=as.data.frame(t(m5a.se)) 
m5a.obs=nrow(fitted(biascorrected.firth.results.m5a))
m5a.aic=summary(biascorrected.firth.results.m5a)["AIC"] 
m5a.bias.corrected.not.age.not.rrr.quantities=as.data.frame(cbind(m5a.coef,m5a.se,m5a.aic, m5a.obs)) 
colnames(m5a.bias.corrected.not.age.not.rrr.quantities)=c("m5a.demob beta", "m5a.switch beta", "m5a.demob SE", "m5a.switch SE", "m5a.AIC", "m5a.N") # renaming columns
m5a.bias.corrected.not.age.not.rrr.quantities=m5a.bias.corrected.not.age.not.rrr.quantities[,c("m5a.demob beta", "m5a.demob SE", "m5a.switch beta", "m5a.switch SE", "m5a.N", "m5a.AIC")] #reordering columns
m5a.bias.corrected.not.age.not.rrr.quantities$z.score.demob=abs(m5a.bias.corrected.not.age.not.rrr.quantities$'m5a.demob beta'/m5a.bias.corrected.not.age.not.rrr.quantities$'m5a.demob SE') #absolute values
m5a.bias.corrected.not.age.not.rrr.quantities$z.score.switch=abs(m5a.bias.corrected.not.age.not.rrr.quantities$'m5a.switch beta'/m5a.bias.corrected.not.age.not.rrr.quantities$'m5a.switch SE')
m5.not.age.not.risk.output <- m5a.bias.corrected.not.age.not.rrr.quantities[c(2,8,3,4,5,6,7,1),]

#print results
print(m5.not.age.not.risk.output)




###############################Table 4####################################################################################
# For Table 4 coding see Stata .do file

txtStop()


#---------------------------------------------------------------------------------------------------------------------------

#Following lines can be un-commented to remove constituent data-frames. 

#rm(m1.coef, m1.se, m2.coef, m2.se, m3.aic, m3.coef, m3.obs, m3.se, m3a.coef, m3a.se, m4.aic, m4.coef, m4.obs, 
#   m4.se, m4a.coef, m4a.se, m5.aic, m5.coef, m5.obs, m5.se, m5a.coef, m5a.se)


#============================================ End Script ===================================================================
#===========================================================================================================================
